nowiki, documentation
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?php
2 require_once ( "Parser.php" ) ;
3
4 /**
5 * This should one day become the XML->(X)HTML parser
6 * Based on work by Jan Hidders and Magnus Manske
7 * To use, set
8 * $wgUseXMLparser = true ;
9 * $wgEnableParserCache = false ;
10 * $wgWiki2xml to the path and executable of the command line version (cli)
11 * in LocalSettings.php
12 * @package MediaWiki
13 * @subpackage Experimental
14 */
15
16 /**
17 * the base class for an element
18 */
19 class element {
20 var $name = '';
21 var $attrs = array();
22 var $children = array();
23
24 /**
25 * This finds the ATTRS element and returns the ATTR sub-children as a single string
26 */
27 function getSourceAttrs ()
28 {
29 $ret = "" ;
30 foreach ($this->children as $child)
31 {
32 if ( !is_string($child) AND $child->name == "ATTRS" )
33 {
34 $ret = $child->makeXHTML ( $parser );
35 }
36 }
37 return $ret ;
38 }
39
40 /**
41 * This collects the ATTR thingies for getSourceAttrs()
42 */
43 function getTheseAttrs ()
44 {
45 $ret = array() ;
46 foreach ($this->children as $child)
47 {
48 if ( !is_string($child) AND $child->name == "ATTR" )
49 {
50 $ret[] = $child->attrs["NAME"] . "='" . $child->children[0] . "'" ;
51 }
52 }
53 return implode ( " " , $ret ) ;
54 }
55
56 /**
57 * This function generates the XHTML for the entire subtree
58 */
59 function sub_makeXHTML ( &$parser , $tag = "" , $attr = "" )
60 {
61 $ret = "" ;
62
63 $attr2 = $this->getSourceAttrs () ;
64 if ( $attr != "" AND $attr2 != "" ) $attr .= " " ;
65 $attr .= $attr2 ;
66
67 if ( $tag != "" )
68 {
69 $ret .= "<" . $tag ;
70 if ( $attr != "" ) $ret .= " " . $attr ;
71 $ret .= ">" ;
72 }
73
74 foreach ($this->children as $child) {
75 if ( is_string($child) ) {
76 $ret .= $child ;
77 } else if ( $child->name != "ATTRS" ) {
78 $ret .= $child->makeXHTML ( $parser );
79 }
80 }
81 if ( $tag != "" )
82 $ret .= "</" . $tag . ">\n" ;
83 return $ret ;
84 }
85
86 function createInternalLink ( &$parser , $target , $display_title , $options )
87 {
88 $tp = explode ( ":" , $target ) ; # tp = target parts
89 $title = "" ; # The plain title
90 $language = "" ; # The language/meta/etc. part
91 $namespace = "" ; # The namespace, if any
92 $subtarget = "" ; # The '#' thingy
93 if ( count ( $tp ) == 1 ) $title = $target ; # Plain and simple case
94 else
95 {
96 # To be implemented
97 }
98
99 if ( $language != "" ) # External link within the WikiMedia project
100 {
101 return "{language link}" ;
102 }
103 else if ( $namespace != "" ) # Link to another namespace, check for image/media stuff
104 {
105 return "{namespace link}" ;
106 }
107 else
108 {
109 return "{internal link}" ;
110 }
111 }
112
113 function makeInternalLink ( &$parser )
114 {
115 $target = "" ;
116 $option = array () ;
117 foreach ($this->children as $child) {
118 if ( is_string($child) ) {
119 # This shouldn't be the case!
120 } else {
121 if ( $child->name == "LINKTARGET" )
122 $target = trim ( $child->makeXHTML ( $parser ) ) ;
123 else
124 $option[] = trim ( $child->makeXHTML ( $parser ) ) ;
125 }
126 }
127
128 if ( count ( $option ) == 0 ) $option[] = $target ; # Create dummy display title
129 $display_title = array_pop ( $option ) ;
130 return $this->createInternalLink ( $parser , $target , $display_title , $option ) ;
131 }
132
133 /**
134 * This function actually converts wikiXML into XHTML tags
135 */
136 function makeXHTML ( &$parser )
137 {
138 $ret = "" ;
139 $n = $this->name ; # Shortcut
140
141 if ( $n == "EXTENSION" ) # Fix allowed HTML
142 {
143 $old_n = $n ;
144 $ext = strtoupper ( $this->attrs["NAME"] ) ;
145 if ( $ext == "B" || $ext == "STRONG" ) $n = "BOLD" ;
146 else if ( $ext == "I" || $ext == "EM" ) $n = "ITALICS" ;
147 else if ( $ext == "U" ) $n = "UNDERLINED" ; # Hey, virtual wiki tag! ;-)
148 else if ( $ext == "S" ) $n = "STRIKE" ;
149 else if ( $ext == "P" ) $n = "PARAGRAPH" ;
150 else if ( $ext == "TABLE" ) $n = "TABLE" ;
151 else if ( $ext == "TR" ) $n = "TABLEROW" ;
152 else if ( $ext == "TD" ) $n = "TABLECELL" ;
153 else if ( $ext == "TH" ) $n = "TABLEHEAD" ;
154 else if ( $ext == "CAPTION" ) $n = "CAPTION" ;
155 else if ( $ext == "NOWIKI" ) $n = "NOWIKI" ;
156 if ( $n != $old_n ) unset ( $this->attrs["NAME"] ) ; # Cleanup
157 else if ( $parser->nowiki > 0 ) $n = "" ; # No "real" wiki tags allowed
158 }
159
160 if ( $n == "ARTICLE" )
161 $ret .= $this->sub_makeXHTML ( $parser ) ;
162 else if ( $n == "HEADING" )
163 $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs["LEVEL"] ) ;
164 else if ( $n == "PARAGRAPH" )
165 $ret .= $this->sub_makeXHTML ( $parser , "p" ) ;
166 else if ( $n == "BOLD" )
167 $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ;
168 else if ( $n == "ITALICS" )
169 $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
170
171 # These don't exist as wiki markup
172 else if ( $n == "UNDERLINED" )
173 $ret .= $this->sub_makeXHTML ( $parser , "u" ) ;
174 else if ( $n == "STRIKE" )
175 $ret .= $this->sub_makeXHTML ( $parser , "strike" ) ;
176
177 # Links
178 else if ( $n == "LINK" )
179 $ret .= $this->makeInternalLink ( $parser ) ;
180 else if ( $n == "LINKTARGET" )
181 $ret .= $this->sub_makeXHTML ( $parser ) ;
182 else if ( $n == "LINKOPTION" )
183 $ret .= $this->sub_makeXHTML ( $parser ) ;
184
185 else if ( $n == "NOWIKI" )
186 {
187 $parser->nowiki++ ;
188 $ret .= $this->sub_makeXHTML ( $parser , "" ) ;
189 $parser->nowiki-- ;
190 }
191
192 # Unknown HTML extension
193 else if ( $n == "EXTENSION" ) # This is currently a dummy!!!
194 {
195 $ext = $this->attrs["NAME"] ;
196
197 $ret .= "&lt;" . $ext . "&gt;" ;
198 $ret .= $this->sub_makeXHTML ( $parser ) ;
199 $ret .= "&lt;/" . $ext . "&gt; " ;
200 }
201
202 # Table stuff
203 else if ( $n == "TABLE" )
204 {
205 $ret .= $this->sub_makeXHTML ( $parser , "table" ) ;
206 }
207 else if ( $n == "TABLEROW" )
208 {
209 $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ;
210 }
211 else if ( $n == "TABLECELL" )
212 {
213 $ret .= $this->sub_makeXHTML ( $parser , "td" ) ;
214 }
215 else if ( $n == "TABLEHEAD" )
216 {
217 $ret .= $this->sub_makeXHTML ( $parser , "th" ) ;
218 }
219 else if ( $n == "CAPTION" )
220 {
221 $ret .= $this->sub_makeXHTML ( $parser , "caption" ) ;
222 }
223
224 else if ( $n == "ATTRS" ) # SPECIAL CASE : returning attributes
225 {
226 return $this->getTheseAttrs () ;
227 }
228
229 # Lists
230 else if ( $n == "LISTITEM" )
231 $ret .= $this->sub_makeXHTML ( $parser , "li" ) ;
232 else if ( $n == "LIST" )
233 {
234 $type = "ol" ; # Default
235 if ( $this->attrs["TYPE"] == "bullet" ) $type = "ul" ;
236 $ret .= $this->sub_makeXHTML ( $parser , $type ) ;
237 }
238
239 # Something else entirely
240 else
241 {
242 $ret .= "&lt;" . $n . "&gt;" ;
243 $ret .= $this->sub_makeXHTML ( $parser ) ;
244 $ret .= "&lt;/" . $n . "&gt; " ;
245 }
246
247 $ret = "\n{$ret}\n" ;
248 $ret = str_replace ( "\n\n" , "\n" , $ret ) ;
249 return $ret ;
250 }
251
252 /**
253 * A function for additional debugging output
254 */
255 function myPrint() {
256 $ret = "<ul>\n";
257 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
258 // print attributes
259 $ret .= '<li> <b> Attributes: </b>';
260 foreach ($this->attrs as $name => $value) {
261 $ret .= "$name => $value; " ;
262 }
263 $ret .= " </li>\n";
264 // print children
265 foreach ($this->children as $child) {
266 if ( is_string($child) ) {
267 $ret .= "<li> $child </li>\n";
268 } else {
269 $ret .= $child->myPrint();
270 }
271 }
272 $ret .= "</ul>\n";
273 return $ret;
274 }
275 }
276
277 $ancStack = array(); // the stack with ancestral elements
278
279 // Three global functions needed for parsing, sorry guys
280 function wgXMLstartElement($parser, $name, $attrs) {
281 global $ancStack;
282
283 $newElem = new element;
284 $newElem->name = $name;
285 $newElem->attrs = $attrs;
286
287 array_push($ancStack, $newElem);
288 }
289
290 function wgXMLendElement($parser, $name) {
291 global $ancStack, $rootElem;
292 // pop element off stack
293 $elem = array_pop ($ancStack);
294 if (count ($ancStack) == 0)
295 $rootElem = $elem;
296 else
297 // add it to its parent
298 array_push ($ancStack[count($ancStack)-1]->children, $elem);
299 }
300
301 function wgXMLcharacterData($parser, $data) {
302 global $ancStack;
303 $data = trim ($data); // Don't add blank lines, they're no use...
304 // add to parent if parent exists
305 if ( $ancStack && $data != "" ) {
306 array_push ($ancStack[count($ancStack)-1]->children, $data);
307 }
308 }
309
310
311 /**
312 * Here's the class that generates a nice tree
313 */
314 class xml2php {
315
316 function &scanFile( $filename ) {
317 global $ancStack, $rootElem;
318 $ancStack = array();
319
320 $xml_parser = xml_parser_create();
321 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
322 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
323 if (!($fp = fopen($filename, 'r'))) {
324 die('could not open XML input');
325 }
326 while ($data = fread($fp, 4096)) {
327 if (!xml_parse($xml_parser, $data, feof($fp))) {
328 die(sprintf("XML error: %s at line %d",
329 xml_error_string(xml_get_error_code($xml_parser)),
330 xml_get_current_line_number($xml_parser)));
331 }
332 }
333 xml_parser_free($xml_parser);
334
335 // return the remaining root element we copied in the beginning
336 return $rootElem;
337 }
338
339 function scanString ( $input ) {
340 global $ancStack, $rootElem;
341 $ancStack = array();
342
343 $xml_parser = xml_parser_create();
344 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
345 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
346
347 if (!xml_parse ($xml_parser, $input, true)) {
348 die (sprintf ("XML error: %s at line %d",
349 xml_error_string(xml_get_error_code($xml_parser)),
350 xml_get_current_line_number($xml_parser)));
351 }
352 xml_parser_free ($xml_parser);
353
354 // return the remaining root element we copied in the beginning
355 return $rootElem;
356 }
357
358 }
359
360 /* Example code:
361
362 $w = new xml2php;
363 $filename = 'sample.xml';
364 $result = $w->scanFile( $filename );
365 print $result->myPrint();
366 */
367
368 $dummytext = "<article><heading level='2'> R-type </heading><paragraph><link><linktarget>image:a.jpg</linktarget><linkoption>1</linkoption><linkoption>2</linkoption><linkoption>3</linkoption><linkoption>text</linkoption></link></paragraph><paragraph>The <link><linktarget>video game</linktarget><linkoption>computer game</linkoption></link> <bold>R-type</bold> is <extension name='nowiki'>cool &amp; stuff</extension> because:</paragraph><list type='bullet'><listitem>it's nice</listitem><listitem>it's fast</listitem><listitem>it has:<list type='bullet'><listitem>graphics</listitem><listitem>sound</listitem></list></listitem></list><table><tablerow><tablecell>Version 1 </tablecell><tablecell>not bad</tablecell></tablerow><tablerow><tablecell>Version 2 </tablecell><tablecell>much better </tablecell></tablerow></table><paragraph>This is a || token in the middle of text.</paragraph></article>" ;
369
370 class ParserXML EXTENDS Parser
371 {
372 /**#@+
373 * @access private
374 */
375 # Persistent:
376 var $mTagHooks;
377
378 # Cleared with clearState():
379 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
380 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
381
382 # Temporary:
383 var $mOptions, $mTitle, $mOutputType,
384 $mTemplates, // cache of already loaded templates, avoids
385 // multiple SQL queries for the same string
386 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
387 // in this path. Used for loop detection.
388
389 var $nowikicount ;
390
391 /**#@-*/
392
393 /**
394 * Constructor
395 *
396 * @access public
397 */
398 function ParserXML() {
399 $this->mTemplates = array();
400 $this->mTemplatePath = array();
401 $this->mTagHooks = array();
402 $this->clearState();
403 }
404
405 /**
406 * Clear Parser state
407 *
408 * @access private
409 */
410 function clearState() {
411 $this->mOutput = new ParserOutput;
412 $this->mAutonumber = 0;
413 $this->mLastSection = "";
414 $this->mDTopen = false;
415 $this->mVariables = false;
416 $this->mIncludeCount = array();
417 $this->mStripState = array();
418 $this->mArgStack = array();
419 $this->mInPre = false;
420 }
421
422 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
423 global $wgWiki2xml ;
424 $tmpfname = tempnam("/tmp", "FOO");
425
426 $handle = fopen($tmpfname, "w");
427 fwrite($handle, $text);
428 fclose($handle);
429
430 exec ( $wgWiki2xml . " < " . $tmpfname , $a ) ;
431 $text = implode ( "\n" , $a ) ;
432
433 unlink($tmpfname);
434
435 $nowikicount = 0 ;
436 $w = new xml2php;
437 $result = $w->scanString( $text );
438 $text .= "<hr>" . $result->makeXHTML ( $this );
439 $text .= "<hr>" . $result->myPrint();
440
441 $this->mOutput->setText ( $text ) ;
442 return $this->mOutput;
443 }
444
445 }
446
447 ?>